# devtools::install_local("geneRefineR/", force = T)
# library("echolocatoR") 
source("echolocatoR/R/functions.R") 

library(readxl)
library(DT)
library(data.table)
library(dplyr)
library(ggplot2)
library(plotly)
library(cowplot)
library(ggrepel)
library(curl)
library(biomaRt) 
# Ensembl LD API
library(httr)
library(jsonlite)
library(xml2)
library(gaston)
library(RCurl)
# *** susieR ****
# library(knitrBootstrap) #install_github('jimhester/knitrBootstrap')
library(susieR) # devtools::install_github("stephenslab/susieR")

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] susieR_0.6.2.0411  tidyr_0.8.1        RCurl_1.95-4.11   
##  [4] bitops_1.0-6       gaston_1.5.4       RcppParallel_4.4.2
##  [7] Rcpp_1.0.0         xml2_1.2.0         jsonlite_1.5      
## [10] httr_1.3.1         biomaRt_2.38.0     curl_3.2          
## [13] ggrepel_0.8.0      cowplot_0.9.3      plotly_4.7.1      
## [16] ggplot2_3.1.0      dplyr_0.7.6        data.table_1.11.8 
## [19] DT_0.4             readxl_1.1.0      
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-35      prettyunits_1.0.2    assertthat_0.2.0    
##  [4] rprojroot_1.3-2      digest_0.6.18        R6_2.4.0            
##  [7] cellranger_1.1.0     plyr_1.8.4           backports_1.1.2     
## [10] stats4_3.5.1         RSQLite_2.1.1        evaluate_0.11       
## [13] pillar_1.3.1         rlang_0.3.1          progress_1.2.0      
## [16] lazyeval_0.2.1       blob_1.1.1           S4Vectors_0.20.1    
## [19] Matrix_1.2-14        rmarkdown_1.10       stringr_1.4.0       
## [22] htmlwidgets_1.2      bit_1.1-14           munsell_0.5.0       
## [25] compiler_3.5.1       pkgconfig_2.0.2      BiocGenerics_0.28.0 
## [28] htmltools_0.3.6      tidyselect_0.2.4     expm_0.999-2        
## [31] tibble_2.0.1         IRanges_2.16.0       XML_3.98-1.12       
## [34] viridisLite_0.3.0    crayon_1.3.4         withr_2.1.2         
## [37] grid_3.5.1           gtable_0.2.0         DBI_1.0.0           
## [40] magrittr_1.5         scales_1.0.0         stringi_1.3.1       
## [43] bindrcpp_0.2.2       tools_3.5.1          bit64_0.9-7         
## [46] Biobase_2.42.0       glue_1.3.0           purrr_0.2.5         
## [49] hms_0.4.2            parallel_3.5.1       yaml_2.1.19         
## [52] AnnotationDbi_1.44.0 colorspace_1.3-2     memoise_1.1.0       
## [55] knitr_1.20           bindr_0.1.1
print(paste("susieR ", packageVersion("susieR")))  
## [1] "susieR  0.6.2.411"
# MINERVA PATHS TO SUMMARY STATISTICS DATASETS:
# root <- "../../.."
root <- "~/../../../sc/orga/projects"
Data_dirs = list(
  # ++++++++ GWAS SUMMARY STATS ++++++++ # 
  # Nall et al. (2019) w/ 23andMe
  "Nalls_23andMe" = list(type="Parkinson's GWAS", 
                         topSS="Data/Parkinsons/Nalls_23andMe/Nalls2019_TableS2.xlsx",
                   # fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/23andme/PD_all_post30APRIL2015_5.2_extended.txt")),
                   fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/combined_meta/nallsEtAl2019_allSamples_allVariants.mod.txt"),
                   reference="https://www.biorxiv.org/content/10.1101/388165v3"),  
  
  ## IGAP 
  "Lambert_2013" = list(type="Alzheimer's GWAS", 
                topSS="Data/Alzheimers/Lambert_2013/Lambert_2019_AD_GWAS.xlsx",
                fullSS=file.path(root,"ad-omics/data/AD_GWAS/Lambert_2013/IGAP_stage_1.1000G.phase3.20130502.tsv"),
                reference="https://www.nature.com/articles/ng.2802"),
  
  ## Marioni et al. (2018) 
  "Marioni_2018" = list(type="Alzheimer's GWAS", 
                        topSS="Data/Alzheimers/Marioni_2018/Marioni2018_supplementary_tables.xlsm",
                        fullSS=file.path(root,"ad-omics/data/AD_GWAS/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv"),
                        reference="https://www.nature.com/articles/s41398-018-0150-6"),
  
  ## Jansen et al. (2018) 
  "Posthuma_2018" = list(type="Alzheimer's GWAS", 
                         topSS="Data/Alzheimers/Posthuma_2018/Posthuma2018_suppTables.xlsx", 
                         fullSS=file.path(root,"ad-omics/data/AD_GWAS/Posthuma_2018/phase3.beta.se.hrc.txt"),
                reference="https://www.nature.com/articles/s41588-018-0311-9"),
  
  ## Kunkle et al. (2018) Alzheimer's GWAS 
  "Kunkle_2019" = list(type="Alzheimer's GWAS",
                       topSS="Data/Alzheimers/Kunkle_2019/Kunkle2019_supplementary_tables.xlsx", 
                       fullSS=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_2019/Kunkle_etal_Stage1_results.txt"),
                       # fullSS_stage2=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_etal_Stage2_results.txt"),
                       reference="https://www.nature.com/articles/s41588-019-0358-2"),

  # ++++++++ eQTL SUMMARY STATS ++++++++ #
  ## MESA eQTLs: African Americans
  "MESA_AFA" = list(type="eQTL",
                    topSS="Data/eQTL/MESA/AFA_eQTL_PTK2B.txt",
                    fullSS=file.path(root,"ad-omics/data/mesa/AFA_cis_eqtl_summary_statistics.txt"), 
                    reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
  ## MESA eQTLs: Caucasians
  "MESA_CAU" = list(type="eQTL",
                    topSS="Data/eQTL/MESA/CAU_eQTL_PTK2B.txt",
                    fullSS=file.path(root,"ad-omics/data/mesa/CAU_cis_eqtl_summary_statistics.txt"),
                    reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
  ## MESA eQTLs: Hispanics
  "MESA_HIS" = list(type="eQTL",
                    topSS="Data/eQTL/MESA/HIS_eQTL_PTK2B.txt",
                    fullSS=file.path(root,"ad-omics/data/mesa/HIS_cis_eqtl_summary_statistics.txt"),
                    reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa") 
  )

Data_dirs_table <- Data_dirs_to_table(Data_dirs, writeCSV = "Results/directories_table.csv")

# Direct to local data files if not working on Minerva
if(getwd()=="/Users/schilder/Desktop/Fine_Mapping"){
  vcf_folder = F # Use internet if I'm working from my laptop
} else{
  vcf_folder = F#"../1000_Genomes_VCFs/Phase1" # Use previously downloaded files if working on Minerva node
}  

force_new_subset = F
allResults <- list()

Parkinson’s Disease

Nalls et. al. (2019) w/ 23andMe

Rare coding variant burden analysis:
We performed kernel-based burden tests on the 113 genes in our PD GWAS loci that contained two or more rare coding variants (MAF< 5% or MAF < 1%). After Bonferroni correction for 113 genes, we identified 7 significant putatively causal genes: - LRRK2, GBA, CATSPER3 (rs11950533/C5orf24 locus) - LAMB2 (rs12497850/IP6K2 locus) - LOC442028 (rs2042477/KCNIP3 locus) - NFKB2 (rs10748818/GBF1 locus), and SCARB2 (rs6825004 locus).”

– from Nalls et al. (2019)

dataset_name <- "Nalls_23andMe"

top_SNPs <- import_topSNPs(
  file_path = Data_dirs[[dataset_name]]$topSS,
  chrom_col = "CHR", position_col = "BP", snp_col="SNP",
  pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
  caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats")

gene_list <- c("LRRK2","SNCA","VPS13C","C5orf24","IP6K2","KCNIP3","GBF1","SCARB2")#, top_SNPs$Gene) %>% unique()
 
allResults[[dataset_name]] <- finemap_geneList(top_SNPs,
                                               geneList=gene_list,
                 file_path= Data_dirs[[dataset_name]]$fullSS,#"./LRRK2_EUR_subset.txt",#
                 chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
                 pval_col = "p", effect_col = "beta", stderr_col = "se", 
                 vcf_folder = vcf_folder, superpopulation = "EUR", force_new_subset = force_new_subset) 

LRRK2

  • Extracting SNPs flanking lead SNP… —Min snp position: 40234202 — —Max snp position: 41234202 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/LRRK2_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-4819.31771667441” [1] “objective:-4817.53694272212” [1] “objective:-4817.5346354578” [1] “objective:-4817.53463247054” [1] “objective:-4817.5346324668” [1] “objective:-4817.53463246679”

Credible Set:

****** 5 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

SNCA

  • Extracting SNPs flanking lead SNP… —Min snp position: 90126111 — —Max snp position: 91126111 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/SNCA_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/SNCA_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-3916.13477060794” [1] “objective:-3885.12408928578” [1] “objective:-3885.10414431053” [1] “objective:-3885.10413226354” [1] “objective:-3885.10413225772” [1] “objective:-3885.10413225772”

Credible Set:

****** 1 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

VPS13C

  • Extracting SNPs flanking lead SNP… —Min snp position: 61497385 — —Max snp position: 62497385 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/VPS13C_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/VPS13C_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-3963.78730870254” [1] “objective:-3963.32612301913” [1] “objective:-3963.3257570866” [1] “objective:-3963.32575679555”

Credible Set:

****** 8 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

C5orf24

  • Extracting SNPs flanking lead SNP… —Min snp position: 133699105 — —Max snp position: 134699105 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/C5orf24_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/C5orf24_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-2730.39327779333” [1] “objective:-2730.27173484353” [1] “objective:-2730.27159727042” [1] “objective:-2730.27159711459”

Credible Set:

****** 37 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

IP6K2

  • Extracting SNPs flanking lead SNP… —Min snp position: 48248989 — —Max snp position: 49248989 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/IP6K2_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/IP6K2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-1732.44890838796” [1] “objective:-1732.18758589222” [1] “objective:-1732.18663923239” [1] “objective:-1732.18663574514”

Credible Set:

— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******

## Warning: Removed 1 rows containing missing values (geom_hline).

KCNIP3

  • Extracting SNPs flanking lead SNP… —Min snp position: 95500943 — —Max snp position: 96500943 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/KCNIP3_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/KCNIP3_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-1045.5376558212” [1] “objective:-1045.2548831746” [1] “objective:-1045.25378394202” [1] “objective:-1045.25377968046” [1] “objective:-1045.25377966438” [1] “objective:-1045.25377966432”

Credible Set:

****** 58 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

GBF1

  • Extracting SNPs flanking lead SNP… —Min snp position: 103515279 — —Max snp position: 104515279 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/GBF1_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/GBF1_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-2327.33554736521” [1] “objective:-2327.14720605155” [1] “objective:-2327.14644032417” [1] “objective:-2327.14643727145”

Credible Set:

— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******

## Warning: Removed 1 rows containing missing values (geom_hline).

SCARB2

  • Extracting SNPs flanking lead SNP… —Min snp position: 76610365 — —Max snp position: 77610365 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/SCARB2_combined_meta_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/SCARB2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-4438.64199045971” [1] “objective:-4438.05391400102” [1] “objective:-4438.05352828151” [1] “objective:-4438.05352803242”

Credible Set:

****** 4 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 207 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 207 rows containing missing values (geom_segment).
## Warning: Removed 4 rows containing missing values (geom_text_repel).

#c("LRRK2","SNCA","VPS13C","GCH1"),
# "GBA","CATSPER3", "LAMB2", "LOC442028", "NFKB2", "SCARB2", "GRN"

Alzheimer’s Disease

Posthuma (2018)

dataset_name <- "Posthuma_2018"

top_SNPs <- import_topSNPs(
  file_path = Data_dirs[[dataset_name]]$topSS,
  sheet = "Table S8",
  chrom_col = "Chr", position_col = "bp", snp_col="SNP",
  pval_col="P", effect_col="Zscore", gene_col="nearestGene",
  caption= "Posthuma et al. (2018) AD GWAS Summary Stats")

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
                 file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Posthuma_2018/phase3.beta.se.hrc.txt",#
                 effect_col = "BETA", stderr_col = "SE", position_col = "BP",
                 snp_col = "SNP", pval_col = "P",
                 vcf_folder = vcf_folder, superpopulation = "EUR",
                 minPos=NULL, maxPos=27440000, force_new_subset = force_new_subset)

PTK2B

  • Extracting SNPs flanking lead SNP… —Min snp position: 26695121 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Posthuma_2018/PTK2B_Posthuma_2018_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-2946.7022383842” [1] “objective:-2946.45848339141” [1] “objective:-2946.45830481344” [1] “objective:-2946.45830468346”

Credible Set:

****** 6 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

Marioni (2018)

dataset_name <- "Marioni_2018"

top_SNPs <- import_topSNPs(
  file_path = Data_dirs[[dataset_name]]$topSS,
  sheet = "Table S9",
  chrom_col = "topSNP_chr", position_col = "topSNP_bp", snp_col="topSNP",
  pval_col="p_GWAS", effect_col="b_GWAS", gene_col="Gene",
  caption= "Marioni et al. (2018) AD GWAS Summary Stats")

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
                 file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv", #,
                 effect_col = "BETA", stderr_col = "SE", position_col = "POS", 
                 snp_col = "SNP",chrom_col = "CHROM", pval_col = "PVAL",
                 vcf_folder = vcf_folder, superpopulation = "EUR",
                 minPos=NULL, maxPos=27440000, force_new_subset = force_new_subset)

PTK2B

  • Extracting SNPs flanking lead SNP… —Min snp position: 26688518 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Marioni_2018/PTK2B_Marioni_2018_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-1966.3994025023” [1] “objective:-1965.70884939607” [1] “objective:-1965.70807253082” [1] “objective:-1965.70807165376”

Credible Set:

****** 5 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 354 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 354 rows containing missing values (geom_segment).
## Warning: Removed 5 rows containing missing values (geom_text_repel).

Lambert (2013) (IGAP)

Filtering out the CLU portion of the locus prevents susieR from identifying a credible set.

dataset_name <- "Lambert_2013"

top_SNPs <- import_topSNPs(
  file_path = Data_dirs[[dataset_name]]$topSS,
  sheet = 1,
  chrom_col = "CHR", position_col = "Position", snp_col="SNP",
  pval_col="Overall_P", effect_col="Overall_OR", gene_col="Closest gene",
  caption= "Lambert (2013) (IGAP): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"), 
                   file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Lambert_2013/IGAP_stage_1.1000G.phase3.20130502.tsv",#
                   chrom_col = "CHROM",  pval_col = "PVAL", snp_col = "SNP",
                   effect_col = "BETA", stderr_col = "SE", position_col = "POS", 
                   vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR",
                   minPos=NULL, maxPos=27440000, force_new_subset = force_new_subset)  

PTK2B

  • Extracting SNPs flanking lead SNP… —Min snp position: 26695121 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Lambert_2013/PTK2B_Lambert_2013_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-1875.33189392468” [1] “objective:-1875.12745540191” [1] “objective:-1875.12685678814” [1] “objective:-1875.12685505843”

Credible Set:

— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******

## Warning: Removed 1 rows containing missing values (geom_hline).

Kunkle (2019) (IGAP)

Exclude CLU-associated portion of locus by removing all SNPs over position 8:27440000.
The CLU-gene technically starts at 8:27456253, but its associated SNPs are further upstream.

dataset_name <- "Kunkle_2019"

top_SNPs <- import_topSNPs(
  file_path = Data_dirs[[dataset_name]]$topSS,
  sheet = "Supplementary Table 6",
  chrom_col = "Chr", position_col = "Basepair", snp_col="SNP",
  pval_col="P", effect_col="Beta", gene_col="LOCUS",
  caption= "Kunkle (2019) (IGAP): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )

allResults[[dataset_name]] <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),  
                   file_path=Data_dirs[[dataset_name]]$fullSS,#"Data/Alzheimers/Kunkle_2019/Kunkle_etal_Stage1_results.txt",#
                   chrom_col = "Chromosome",  position_col = "Position", pval_col = "Pvalue",
                   snp_col = "MarkerName", effect_col = "Beta", stderr_col = "SE", 
                   vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR",
                   minPos=NULL, maxPos=27440000, file_sep=" ", force_new_subset = force_new_subset) # Exclude CLU-associated portion of locus

PTK2B

  • Extracting SNPs flanking lead SNP… —Min snp position: 26719987 — —Max snp position: 27440000 — Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Kunkle_2019/PTK2B_Kunkle_2019_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-2968.55924360798” [1] “objective:-2968.40951319364” [1] “objective:-2968.40927818658” [1] “objective:-2968.40927781”

Credible Set:

****** 5 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

eQTL

  • AFR = AFA = African
  • AMR = Ad Mixed American
  • EAS = East Asian
  • EUR = CAU = European
  • SAS = South Asian
  • HIS = Hispanic

MESA - AFA

dataset_name <- "MESA_AFA"
superpopulation <- "AFA"
allResults[[dataset_name]] <- finemap_eQTL(superpopulation, gene =  "PTK2B", 
                                         fullSS_path = Data_dirs[[dataset_name]]$fullSS, 
                                         force_new_subset = force_new_subset)

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AFR_subset.txt …

## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored

PTK2B

  • Extracting SNPs flanking lead SNP… —Min snp position: 26695121 — —Max snp position: 27695121 — Extracting relevant variants from fullSS…

awk -F " " ‘NR==1{print $0}{ if(($7 == 8) && ($12 >= 26695121 && $12 <= 27695121)) { print } }’ Data/eQTL/MESA/PTK2B_AFR_subset.txt > Data/eQTL/MESA/PTK2B_MESA_subset.txt Extraction completed in 0.09 seconds Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-4452.41334407999” [1] “objective:-4452.272858439” [1] “objective:-4452.27277053595” [1] “objective:-4452.27277048098”

Credible Set:

****** 2 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

MESA - CAU

dataset_name <- "MESA_CAU"
superpopulation <- "CAU"
allResults[[dataset_name]] <- finemap_eQTL(superpopulation, gene = "PTK2B", 
                                         fullSS_path = Data_dirs[[dataset_name]]$fullSS, 
                                         force_new_subset = force_new_subset)

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_EUR_subset.txt …

## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored

PTK2B

  • Extracting SNPs flanking lead SNP… —Min snp position: 26695121 — —Max snp position: 27695121 — Extracting relevant variants from fullSS…

awk -F " " ‘NR==1{print $0}{ if(($7 == 8) && ($12 >= 26695121 && $12 <= 27695121)) { print } }’ Data/eQTL/MESA/PTK2B_EUR_subset.txt > Data/eQTL/MESA/PTK2B_MESA_subset.txt Extraction completed in 0.06 seconds Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-2439.61831759579” [1] “objective:-2438.71996233951” [1] “objective:-2438.71907514181” [1] “objective:-2438.71907425949”

Credible Set:

****** 5 SNPs included in Credible Set ******

## Warning: Removed 1 rows containing missing values (geom_hline).

MESA - HIS

Used the Ad Mixed American (AMR) reference panel given the absence of a Hispanic reference panel for 1KG_Phase1.

dataset_name <- "MESA_HIS"
superpopulation <- "HIS"
allResults[[dataset_name]] <- finemap_eQTL(superpopulation, gene = "PTK2B", 
                                       fullSS_path = Data_dirs[[dataset_name]]$fullSS, 
                                       force_new_subset = force_new_subset)

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AMR_subset.txt …

## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored

PTK2B

  • Extracting SNPs flanking lead SNP… —Min snp position: 26719987 — —Max snp position: 27719987 — Extracting relevant variants from fullSS…

awk -F " " ‘NR==1{print $0}{ if(($7 == 8) && ($12 >= 26719987 && $12 <= 27719987)) { print } }’ Data/eQTL/MESA/PTK2B_AMR_subset.txt > Data/eQTL/MESA/PTK2B_MESA_subset.txt Extraction completed in 0.08 seconds Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-3444.59629242866” [1] “objective:-3444.46011107977” [1] “objective:-3444.45983730976” [1] “objective:-3444.4598367586”

Credible Set:

— ERROR — ****** Could NOT identify credible set. Default to SNPs with the top 5 PIPs ******

## Warning: Removed 1 rows containing missing values (geom_hline).

Merge Results

merged_results <- merge_finemapping_results(allResults, csv_path ="Results/merged_finemapping_results.csv")
createDT(merged_results, caption = "Finemapped Credible Sets for Multiple Datasets")
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
# table(as.character(merged_results$SNP))